#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
初始化 enrollment 的event的时间
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()

time_format = "%Y-%m-%dT%H:%M:%S"

log_path, output = args['log_path'], args['output']

log = pd.read_csv(log_path)
event_set = set()
log.event.map(lambda x: event_set.add(x))
log_date = log.time.map(lambda x: d.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
log['date'] = log_date

enroll_fea = pd.DataFrame()

enroll_event_date = {}
for enrollment_id, event, date in log[ ["enrollment_id", "event", "date"] ].values:
    if enrollment_id not in enroll_event_date:
        enroll_event_date[enrollment_id] = { 'overall': []}
    if event not in enroll_event_date[enrollment_id]:
        enroll_event_date[enrollment_id][event] = []
    enroll_event_date[enrollment_id]['overall'].append(date)
    enroll_event_date[enrollment_id][event].append(date)

enrollment_ids = set()
log.enrollment_id.map(lambda x: enrollment_ids.add(x))

enroll_fea['enrollment_id'] = list(enrollment_ids)

event_set = set()

log.event.map(lambda x: event_set.add(x))

event_set.add('overall')

# init head
events = {}
for event in event_set:
    enroll_fea['event_%s_date' % event] = [None for i in range(enroll_fea.enrollment_id.count())]
    events[event] = []

for enrollment_id in enroll_fea.enrollment_id.values:
    rcd = enroll_event_date[enrollment_id]
    for event in event_set:
        events[event].append(
            ' '.join(date.strftime(time_format) for date in sorted(rcd.get(event, []))))

for event,rcd in events.items():
    enroll_fea['event_%s_date' % event] = rcd

enroll_fea.to_csv(output, index=False)

